# 汇编程序编译器 将汇编指令编译为我的CPU里指令
# 指令存储格式 8位指令 后跟操作数

import os
import re
from cpu import asm as ASM
from cpu import pin

# 指定当前目录，并设置程序编译为program.bin
my_dir = os.path.dirname(__file__)
dst_name = os.path.join(my_dir, 'program.bin')
src_name = os.path.join(my_dir, 'program.asm')
# 汇编代码的正则表达式
annotation = re.compile(r"(.*?);.*")
# 代码集合
codes = []
# 标记字典
marks = {}

# 支持的指令
OP2 = {
    'MOV': ASM.MOV,
    'ADD': ASM.ADD,
    'SUB': ASM.SUB,
    'CMP': ASM.CMP,
    'AND': ASM.AND,
    'OR': ASM.OR,
    'XOR': ASM.XOR
}

OP1 = {
    'INC': ASM.INC,
    'DEC': ASM.DEC,
    'NOT': ASM.NOT,
    'JMP': ASM.JMP,
    'JO': ASM.JO,
    'JNO': ASM.JNO,
    'JZ': ASM.JZ,
    'JNZ': ASM.JNZ,
    'JP': ASM.JP,
    'JNP': ASM.JNP,
    'PUSH': ASM.PUSH,
    'POP': ASM.POP,
    'CALL': ASM.CALL,
    'INT': ASM.INT,
}

OP0 = {
    'NOP': ASM.NOP,
    'RET': ASM.RET,
    'IRET': ASM.IRET,
    'STI': ASM.STI,
    'CLI': ASM.CLI,
    'HLT': ASM.HLT,
}

OP2SET = set(OP2.values())
OP1SET = set(OP1.values())
OP0SET = set(OP0.values())

# 可使用的寄存器字典
REGISTERS = {
    "A": pin.A,
    "B": pin.B,
    "C": pin.C,
    "D": pin.D,
    "SS": pin.SS,
    "SP": pin.SP,
}

# 代码类
class Code:
    # 代码类型
    TYPE_CODE = 1
    TYPE_LABEL = 2

    def __init__(self, number, source):
        self.number = number
        self.source = source.upper()
        self.op = None
        self.dst = None
        self.src = None
        self.type = self.TYPE_CODE  # 默认是代码
        self.name = None
        self.index = 0
        # 代码预处理
        self.prepare_source()

    def prepare_source(self):
        # 以冒号结尾,为标记
        if self.source.endswith(':'):
            self.type = self.TYPE_LABEL
            self.name = self.source.strip(':')
            return
        arr = self.source.split(',')
        if len(arr) > 2:
            raise SyntaxError(self)
        if len(arr) == 2:
            # 取到源操作数
            self.src = arr[1].strip()
        arr = re.split(r" +", arr[0])
        if len(arr) > 2:
            raise SyntaxError(self)
        if len(arr) == 2:
            # 取到目的操作数
            self.dst = arr[1].strip()
        # 取到指令助记符
        self.op = arr[0].strip()
        pass

    # 通过MOV字符指令转换为二进制数据
    def get_op(self):
        if self.op in OP2:
            return OP2[self.op]
        if self.op in OP1:
            return OP1[self.op]
        if self.op in OP0:
            return OP0[self.op]
        print(f"不支持的OP {self.op} ")
        raise CodeSyntaxError(self)

    # 通过地址获取
    def get_am(self, addr):
        global marks
        if addr in marks:
            return pin.AM_INS, marks[addr].index * 3
        if not addr:
            return None, None
        # MOV A, B 寄存器寻址
        if addr in REGISTERS:
            return pin.AM_REG, REGISTERS[addr]
        # MOV A, 5 立即寻址
        if re.match(r'^[0-9]+$', addr):
            return pin.AM_INS, int(addr)
        # MOV A, 0X5f 立即寻址，16进制
        if re.match(r'^0X[0-9A-F]+$', addr):
            return pin.AM_INS, int(addr, 16)
        # MOV A, [5]
        match = re.match(r'^\[([0-9]+)\]$', addr)
        if match:
            return pin.AM_DIR, int(match.group(1))
        # MOV A,[0x13]
        match = re.match(r'^\[(0X[0-9A-F]+)\]$', addr)
        if match:
            return pin.AM_DIR, int(match.group(1), 16)
        # MOV [0x5],A
        match = re.match(r'^\[(.+)\]$', addr)
        if match and match.group(1) in REGISTERS:
            return pin.AM_RAM, REGISTERS[match.group(1)]

        print(f"不支持的指令 {self.source}")
        raise CodeSyntaxError(self)

    def __repr__(self):
        return f'[{self.number}] - {self.source}'

    def compile_code(self):
        op = self.get_op()
        amd, dst = self.get_am(self.dst)
        ams, src = self.get_am(self.src)
        # print(f'src={src} and dst={dst}')
        if src is not None and dst is not None and (amd, ams) not in ASM.INS_SUPPORT_LIST[2][op]:
            raise CodeSyntaxError(self)
        if src is None and dst is not None and amd not in ASM.INS_SUPPORT_LIST[1][op]:
            raise CodeSyntaxError(self)
        if src is None and dst is None and op not in ASM.INS_SUPPORT_LIST[0]:
            raise CodeSyntaxError(self)

        amd = amd or 0
        ams = ams or 0
        dst = dst or 0
        src = src or 0

        # 组装ir
        # ir=2地址指令时 4位指令 | 2位目标数 | 2位源操作数
        # ir=1地址指令时 6位指令 | 2位目标数
        # ir=0地址指令时 8位指令
        if op in OP2SET:
            ir = op | amd << 2 | ams
        if op in OP1SET:
            ir = op | amd
        if op in OP0SET:
            ir = op
        # 返回指令, 目标操作数, 源操作数
        return [ir, dst, src]

# 语法错误
class CodeSyntaxError(Exception):

    def __init__(self, code: Code, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.code = code

def compile_program():
    global codes
    global marks
    # 读取源代码
    with open(src_name, encoding="utf-8") as file:
        lines = file.readlines()
    # 逐行处理
    for number,line in enumerate(lines):
        # 去掉空格
        source = line.strip()
        # 处理注释
        if ";" in source:
            match = annotation.match(source)
            source = match.group(1)
        if not source:
            continue
        # 加入代码集合
        codes.append(Code(number+1, source))
    # 最后加入一行停止程序的指令
    codes.append(Code(number+2,"HLT"))
    # 倒叙处理标记
    result = []
    current = None
    for var in range(len(codes) - 1, -1, -1):
        code = codes[var]
        # 倒叙处理标记，如果是标记则加入到marks字典，是代码则加入代码集合
        if code.type == Code.TYPE_CODE:
            current = code
            result.insert(0, code)
            continue
        if code.type == Code.TYPE_LABEL:
            marks[code.name] = current
            continue
        raise SyntaxError(code)
    # 设置代码当前指令行序号
    for index, var in enumerate(result):
        var.index = index

    # 将代码编译后生成program.bin二进制代码
    with open(dst_name, 'wb') as file:
        for code in result:
            values = code.compile_code()
            print(f'{code} , {values}')
            for value in values:
                result = value.to_bytes(1, byteorder='little')
                file.write(result)
    pass


def main():
    compile_program()
    # try:
    #     compile_program()
    # except CodeSyntaxError as e:
    #     print(e)
    # except Exception as e:
    #     print(e)
    print("程序编译成功")
    pass


if __name__ == '__main__':
    main()
